# ======================================================================================= #
# #Enns, Kelly, Masaki, and Wohlfarth (EKMW)
# #The following code is identical to that produced by Lebo and Kraft excpet
# #where explicitly noted.
# Lebo/Kraft 2017. "The General Error Correction Model in Practice" (Research & Politics) #
# Auxiliary functions used in analyses.R                                                  #
# ======================================================================================= #

## Load required packages
library(fracdiff)
library(tseries)
## EKMW load vrtest, urca
library(vrtest)
library(urca)

## Estimate multivariate GECM
gecmEst <- function(y, x){
  # y: dependent variable (vector)
  # x: independent variables (matrix)
  x  <- as.matrix(x)
  dy <- diff(y)
  ly <- y[-length(y)]
  dx <- diff(x)
  colnames(dx) <- paste0("d",colnames(dx))
  lx <- as.matrix(x[-nrow(x),])
  colnames(lx) <- paste0("l",colnames(lx))
  iv_ <- cbind(ly,dx,lx)
  lm(dy ~ iv_)
}

#########################################
## In order to correctly identify the lag length for the ADF EKMW generate a
## function that sequentially adds lags in the ADF test until autocorrelation is removed
adf.test.modified <- function(y){
 r <- ur.df(y, type = c("drift"), lags = 0)@testreg$residuals
 #Q test for white noice in residuals
 i <- Auto.Q(r)$Pvalue
 j <- 0
 while(i < 0.05) {
   j <- j + 1
   r <- ur.df(y, type = c("drift"), lags = j)@testreg$residuals
   i <- Auto.Q(r)$Pvalue
 }
 ur.df(y, type = c("drift"), lags = j)
}
#########################################


## Simulate time series varying d or rho (uses arima.sim)
tsSim <- function(n = 50, par = "d", val){
  # n:    length of series
  # par:  parameter to be set, either "d" or "rho"
  # val:  value for par
  if(par == "d"){
    if(val>=0 & val<.5) x <- fracdiff.sim(n=n, d=val)$series
    else if(val>=.5 & val<=1) x <- cumsum(fracdiff.sim(n=n, d=(val-1))$series)
    else stop("d parameter out of range")
  } else if(par == "rho"){
    if(val>0 & val<1) x <- as.vector(arima.sim(n = n, list(ar = val)))
    else if(val==0) x <- rnorm(n)
    else if(val==1) x <- cumsum(rnorm(n))
  } else stop("par has to be either 'd' or 'rho'")
  if(exists("x")) return(x)
}

## EKMW added the following line below: dfk <- sapply(Y, function(y) apply(y, 2, function(z) adf.test(z)$parameter)) so as to keep track of
## how many lags are included in the ADF test
## EKMW also added lx_t and dx_t so the relationship between X and Y in the GECM could be considered.

## Simulate Dickey-Fuller and alpha*_1 values for unrelated stationary series
simDfAlpha <- function(nsim = 1000, nstep = 10, nt = 50, par = "d"){
  # nsim:   number of simulations per scenario
  # nstep:  number of steps in parameter (d for now)
  # nt:     number of time points
  # par:    parameter to be varied, can be either "d" or "rho"
  steps <- seq(0,.9,length.out=nstep)
  X <- lapply(steps, function(x) replicate(nsim, tsSim(n=nt, par=par, val=x)))
  Y <- lapply(steps, function(x) replicate(nsim, tsSim(n=nt, par=par, val=x)))
  dfval <- sapply(Y, function(y) apply(y, 2, function(z) adf.test(z)$statistic))
  dfk <- sapply(Y, function(y) apply(y, 2, function(z) adf.test(z)$parameter))
  alpha1 <- sapply(1:length(steps), function(i) sapply(1:nsim, function(j) 
    coef(gecmEst(Y[[i]][,j], X[[i]][,j]))["iv_ly"]))
  alpha1_t <- sapply(1:length(steps), function(i) sapply(1:nsim, function(j) 
    summary(gecmEst(Y[[i]][,j], X[[i]][,j]))$coefficients["iv_ly","t value"]))
  lx_t <- sapply(1:length(steps), function(i) sapply(1:nsim, function(j) 
    summary(gecmEst(Y[[i]][,j], X[[i]][,j]))$coefficients["iv_l","t value"]))
  dx_t <- sapply(1:length(steps), function(i) sapply(1:nsim, function(j) 
    summary(gecmEst(Y[[i]][,j], X[[i]][,j]))$coefficients["iv_d","t value"]))
  out <- data.frame(dfval = as.vector(dfval)
                    , dfk = as.vector(dfk)
                    , alpha1 = as.vector(alpha1), alpha1_t = as.vector(alpha1_t)
                    , lx_t = as.vector(lx_t), dx_t = as.vector(dx_t)
                    , val = rep(steps, each=nsim), par = par, nt = as.factor(nt))
  return(out)
}


##################################
## EKMW replicate simDfAlpha but choose a lag length based on autocorrelation in residuals.
simDfAlpha_k0 <- function(nsim = 1000, nstep = 10, nt = 50, par = "d"){
  steps <- seq(0,.9,length.out=nstep)
  X <- lapply(steps, function(x) replicate(nsim, tsSim(n=nt, par=par, val=x)))
  Y <- lapply(steps, function(x) replicate(nsim, tsSim(n=nt, par=par, val=x)))
  adfval_k0 <- sapply(Y, function(y) apply(y, 2, function(z) ur.df(z, type = c("drift"), lags = 0)@teststat[1]))
  dfval <- sapply(Y, function(y) apply(y, 2, function(z) adf.test.modified(z)@teststat[1]))
  dfk <- sapply(Y, function(y) apply(y, 2, function(z) adf.test.modified(z)@lags))
  dfcval <- sapply(Y, function(y) apply(y, 2, function(z) adf.test.modified(z)@cval[1,2]))
  alpha1 <- sapply(1:length(steps), function(i) sapply(1:nsim, function(j) 
    coef(gecmEst(Y[[i]][,j], X[[i]][,j]))["iv_ly"]))
  alpha1_t <- sapply(1:length(steps), function(i) sapply(1:nsim, function(j) 
    summary(gecmEst(Y[[i]][,j], X[[i]][,j]))$coefficients["iv_ly","t value"]))
  lx_t <- sapply(1:length(steps), function(i) sapply(1:nsim, function(j) 
    summary(gecmEst(Y[[i]][,j], X[[i]][,j]))$coefficients["iv_l","t value"]))
  dx_t <- sapply(1:length(steps), function(i) sapply(1:nsim, function(j) 
    summary(gecmEst(Y[[i]][,j], X[[i]][,j]))$coefficients["iv_d","t value"]))
  out <- data.frame(dfval = as.vector(dfval)
                    , dfk = as.vector(dfk), dfcval = as.vector(dfcval) 
                    , alpha1 = as.vector(alpha1), alpha1_t = as.vector(alpha1_t)
                    , lx_t = as.vector(lx_t), dx_t = as.vector(dx_t)
                    , val = rep(steps, each=nsim), par = par, nt = as.factor(nt))
  return(out)
}
##################################


## Compute MacKinnon critical values
macKinnon <- function(t){
  -3.2145 - 3.21/t - 2.0/t^2 + 17/t^3
}
